#!/usr/bin/env python
#coding:utf-8

from core import global_setting
import redis_connector as rediscon
import json
from conf import hosts
import time

lt_operator = ['idle', 'nice']
longstr_operator = ['uptime', 'host_status']

failurehost = ('duanhq',)


def update_all_hosts_configuration():
    '将配置文件的信息存储到字典host_dic中'
    host_dic ={}
    for h in hosts.monitored_hosts:
        host_dic[h.hostname] = {'services': {}}

        for service_name,v in h.services.items():
            # save the time of  last message from client service
            host_dic[h.hostname]['services'][service_name] = {}
            host_dic[h.hostname]['services'][service_name]['lastime'] = \
                [0, 0, []]
            for index in v.triggers:
                # time of last data, erro's times, \
                # all time of last N times of error
                host_dic[h.hostname]['services'][service_name][index] = \
                    [0, 0, []]

    return host_dic


def check_data(index, val, service_data):
    'check info from cpu service'
    # 字符串转为浮点；部分数据分片取出数值数据
    if index in longstr_operator:
        return 0
    else:
        if type(service_data[1][index]) in (str, unicode):
            index_val = float(service_data[1][index].strip())
        else:
            index_val = service_data[1][index]

    # 若数据越界，返回状态1，否则返回0
    if index in lt_operator:
        if index_val < val[1]:
            return 1
        elif index_val < val[2]:
            return 1
        else:
            return 0
    else:
        if index_val <= val[1]:
            return 1
        elif index_val >= val[2]:
            return 1
        else:
            return 0


def check_error(service_name, service_data, current, h, v,  N, T):
    for key,val in v.triggers.items():
        # 调用check_data()检测资源数据是否越界；越界则添加次数和时间记录
        if check_data(key, val, service_data):
            current[key][1] += 1
            current[key][2].append(service_data[0])

        # 检测在规定时间内的错误次数
        if current[key][1] >= N:
            time1 = current[key][2][-1] - current[key][2][0]
            time2 = current[key][2][-1] - current[key][2][-2]

            # 若离上次时间超过T，清除之前的记录；并跳过
            if time2 > T:
                current[key][1] = 1
                current[key][2] = []
                current[key][2].append(service_data[0])
                continue

            print(time1, time2, T + v.interval)
            # 若错误次数和时间合乎要求，报警；否则清除最早的一条错误记录
            if int(time1) <= T + v.interval:
                print('\033[35m>>>>Error@%s>>>>%s::%s@\033[0m'
                    % (time.ctime(), h.hostname,service_name))
                print('\033[31m%s has %d error times in %d secs!\033[0m'
                    % (key, current[key][1], time1))
            else:
                current[key][1] -= 1
                current[key][2].pop(0)


def check():
    for h in hosts.monitored_hosts:
        if h.hostname in failurehost:
            continue
        #print('\033[32m======= %s ========\033[0m' % h.hostname)
        for service_name,v in h.services.items():
            service_key = '%s::%s' % (h.hostname, service_name)
            service_data = rediscon.r.get(service_key)

            # current为service_name对象的别名
            current = host_dic[h.hostname]['services'][service_name]

            if service_data is not None:
                service_data = json.loads(service_data)
                time_pass = time.time() - service_data[0]

                # 检测最新的数据是否超时
                if time_pass >= v.interval + 10:
                    current['lastime'][1] += 1
                    current['lastime'][2].append(service_data[0])

                # 若当前数据时间和上次抓取的相同，跳过；否则更新抓取时间
                if service_data[0] == current['lastime'][0]:
                    continue
                else:
                    # save the recived time of last data
                    current['lastime'][0] = service_data[0]

                # 检测数据并记录有错误的次数及时间
                if service_data[1]['status'] == 0:
                    # 别名(N, T)各主机服务资源的监控频率（次数及时间）
                    N = v.failurecount
                    T = v.failuretime
                    check_error(service_name, service_data, current,
                                                            h, v,  N, T)
                else:
                    # 若执行状态码不为0，记录一次执行错误
                    current['lastime'][1] += 1
                    current['lastime'][2].append(service_data[0])
                    if current['lastime'][1] >= N:
                        print('\033[31mError>>%s::%s is unvalid\033[0m'
                            % (h.hostname, service_name))
            else:
                # 获取不到客户端的资源数据，表明客户端未开启该服务
                print('\033[31mError>>%s::%s is not valid\033[0m'
                        % (h.hostname, service_name))


if __name__ == '__main__':
    host_dic = update_all_hosts_configuration()  # 收集配置信息，初始化错误记录
    print(host_dic)

    # 每隔5秒抓取REDIS的记录，并分析数据
    while True:
        check()
        time.sleep(5)
